**********
*
*  Replication code for "Rebel group formation in Africa: Evidence from a new dataset" 
*  by Janet I. Lewis, in World Development. Prepared 5/2/2023
*
**********


* Note: Both R Version 4.2 and Stata 17 used for analyses below, each is noted as needed 


* African Country Cloropeth Map (Figure 1) - Using R

#Install packages as needed - e.g. install.packages("countrycode")
library(countrycode) #Package for changing between different country codes
library(spData) #Package with Africa Map Data that includes South Sudan
library(tidyverse) #Package for Cleaning and Manipulating Data
library(tmap) # Package for Mapping
library(sf) # Package for encoding spatial vector data

world<-world
rebel<-read.csv("CountryLevelRebelFormationData.csv")
rebel$iso_a2<-countrycode(rebel$ccode,origin="cown",destination = "iso2c")
Africa<-filter(world,continent == "Africa", !is.na(iso_a2))
Africa2<-merge(Africa,rebel,by="iso_a2")
Africa2$COWC<-countrycode(Africa2$iso_a2,origin ="iso2c", destination="cowc")
names(Africa2)[names(Africa2) == "rcode"] <- "Rebel Groups"

sf_use_s2(FALSE)
# Final Map with Labels
A_Map<-tm_shape(Africa2) + tm_polygons("Rebel Groups")+
  tm_layout(main.title = "New Rebel Groups Formed (1997-2015)",main.title.size=1.2,legend.title.size = 1.2,legend.text.size=.8)+
  tm_text("COWC",size = "AREA")
A_Map



* Scatterplot (Figure 2) - using Stata

import delimited "CountryLevelRebelFormationData.csv" 

/* findit lean1
net install gr0002_2.pkg */
set scheme lean1
gen rurallandarea1000=.
replace rurallandarea1000=(rurallandareasqkmaglndtotlruk2)/1000
* need to create logged var and drop zeros to use logarithmic y-axis scale, which is visually cleaner - pos rel holds if not logged (shown in appendix)
gen lnrcode=.
replace lnrcode=log(rcode)  
drop if rcode==0 
gen pos=.
replace pos=8 if country=="Sudan"
replace pos=6 if country=="Eritrea"
replace pos=11.5 if country=="Madagascar"
replace pos=9 if country=="Liberia"
replace pos=12.5 if country=="Tunisia"
replace pos=1 if country=="Cote d'Ivoire"
replace pos=1 if country=="Namibia"
replace pos=3 if country=="Cameroon"
replace pos=3 if country=="Tanzania"
replace pos=9 if country=="Gambia"
replace pos=3 if country=="South Africa"
replace pos=12 if country=="Sierra Leone"
scatter rcode rurallandarea1000, yscale(log) scheme(lean1) mlabel(countryname) mlabv(pos) xtitle(Rural Land Area (thousands of sq. km.)) ytitle(No. of New Rebel Groups Formed (1997-2015)) plotregion(margin(l+9)) ylabel(1 5 10 15 20 25)

clear




* Rebel Group Descriptive Statistics (Table 1) - using Stata

import delimited "RebelGroupFormationAfrica.csv"
gen civilsociety=0
replace civilsociety=1 if party==1 | religious==1 | other_org==1
replace civilsociety=. if party==. & religious==. & other_org==.
sum rural clandestine oil foreign_govt protest_local contestation_local civilsociety military other_rebels no_prior_org large_1yr large_2yr large_3yr



* Rebel group-level regression analysis (Table 2) - using Stata

*Model 1
reg rural protest_local, cluster(country)

*Model 2
reg rural protest_local clandestine foreign_govt oil_diamonds, cluster(country)

*Model 3
reg rural contestation_local, cluster(country)

*Model 4
reg rural contestation_local clandestine foreign_govt oil_diamonds, cluster(country)

*Model 5
reg rural no_prior_org, cluster(country)

*Model 6
reg rural no_prior_org clandestine foreign_govt oil_diamonds, cluster(country)

*Model 7
reg rural civilsociety, cluster(country)

*Model 8
reg rural civilsociety clandestine foreign_govt oil_diamonds, cluster(country)

* checking similar results with logit (noted but not shown in paper)
*Model 1
*logit rural protest_local, cluster(country)
*Model 2
*logit rural protest_local clandestine foreign_govt oil_diamonds, cluster(country)
*Model 3
*logit rural contestation_local, cluster(country)
*Model 4
*logit rural contestation_local clandestine foreign_govt oil_diamonds, cluster(country)
*Model 5
*logit rural no_prior_org, cluster(country)
*Model 6
*logit rural no_prior_org clandestine foreign_govt oil_diamonds, cluster(country)
*Model 7
*logit rural civilsociety, cluster(country)
*Model 8
*logit rural civilsociety clandestine foreign_govt oil_diamonds, cluster(country)





*****
*
* APPENDIX 
*
*******

* Scatterplot (Table 1A) - using Stata


import delimited "CountryLevelRebelFormationData.csv" 
gen rurallandarea1000=.
replace rurallandarea1000=(rurallandareasqkmaglndtotlruk2)/1000
gen pos=.
replace pos=9 if country=="Sudan"
scatter rcode rurallandarea1000,scheme(lean1) mlabel(countryname) mlabv(pos) xtitle(Rural Land Area (thousands of sq. km.)) ytitle(No. of New Rebel Groups Formed (1997-2015)) plotregion(margin(l+9)) ylabel(1 5 10 15 20 25) || lfit rcode rurallandarea1000, leg(off)
clear

* Country-Level Regression Analysis (Table 2A) - using Stata 

import delimited "CountryLevelRebelFormationData.csv" 
gen rurallandarea1000=.
replace rurallandarea1000=(rurallandareasqkmaglndtotlruk2)/1000
gen pop1000=populationtotalsppoptotl/1000
gen lnpop1000=log(pop1000)
gen lnmilitaryexp = log(militaryexpenditureofgdpmsmilxpn)
gen lnaccesstoelectricityrural = log(accesstoelectricityruralofruralp) 

* Model 1
reg rcode rurallandarea1000 
* checking with transformed vars due to skewed distributions
* reg lnrcode rurallandarea1000
* gen lnrurallandarea1000 = log(rurallandarea1000) 
* reg lnrcode lnrurallandarea1000
* still estimated precisely with logged DV and/or logged IV - chose to show non-logged in paper for simple interpretation

* Model 2
reg rcode rurallandarea1000 literacyrateadulttotalofpeopleag lnmilitaryexp lnaccesstoelectricityrural lnpop1000

* Model 3
nbreg rcode rurallandarea1000 
 
* Model 4
nbreg rcode rurallandarea1000 literacyrateadulttotalofpeopleag lnmilitaryexp lnaccesstoelectricityrural lnpop1000

* Estimating quantities of interest
/* ssc install clarify */ 
estsimp nbreg rcode rurallandarea1000 literacyrateadulttotalofpeopleag lnmilitaryexp lnaccesstoelectricityrural lnpop1000
setx mean
simqi, fd(ev) changex(rurallandarea1000 p50 p75)
clear



* Multiple imputation analysis (Table 2A)

* Because each resulting set of imputed datasets will be different, replication materials includes both the code to re-run the imputation and the file of appended imputed datasets used for results shown in the appendix: "MIdata.dta"

* pre-processing - first using Stata:

import delimited "RebelGroupFormationAfrica.csv"

drop formed_month formed_date formed_certain op_planning_location other small_attack_year small_attack_month small_attack_ddate small_attack_certain large_attack_year large_attack_month large_attack_date large_attack_certain civ_attack_year civ_attack_month civ_attack_date civ_attack_certain civ_attack_time civ_attack_time2 former_security current_security former_rebels current_rebels leaders_formergovt leaders_currentgovt viable_negotiate viable_civ viable_merge viable_splinter viable_hide party religious other_org large_2yr large_3yr 

export delimited using "RebelGroupFormationAfricaDroppedVars.csv"
 
 * using R to run Amelia: 

require(Amelia)
library(Amelia)
library(foreign)
mydata <- read.csv("RebelGroupFormationAfricaDroppedVars.csv")
set.seed(123456789)
a.out5 <- amelia(mydata, m = 5, cs = "rcode", ts = NULL, idvars = c("rebel_name", "country", "ccode"), ords =c("rebel_certainty", "UCD", "GTD", "ACLED"), noms =c("clandestine", "goal_take", "goal_secession", "goal_autonomy", "military", "other_rebels", "self_defense", "no_prior_org", "rural", "planning_inside", "foreign_govt", "oil_diamonds", "large_1yr", "protest_local", "contestation_local", "viable", "civilsociety", "planning_border"))
a.out5
write.amelia(obj=a.out5, file.stem = "outdata", format = "dta") 

* preparing for post-MI estimation, back in Stata:

insheet using "outdata.csv", c
gen _id = _n
gen _imp = 0
save "GroupLevelDataMI.dta"
clear

use "outdata1.dta"
destring, replace
gen _id = _n
gen _imp = 1
save "outdata1wVars.dta"

use "outdata2.dta"
destring, replace
gen _id = _n
gen _imp = 2
save "outdata2wVars.dta"

use "outdata3.dta"
destring, replace
gen _id = _n
gen _imp = 3
save "outdata3wVars.dta"

use "outdata4.dta"
destring, replace
gen _id = _n
gen _mj = 4
save "outdata4wVars.dta"

use "outdata5.dta"
destring, replace
gen _id = _n
gen _imp = 5
save  "outdata5wVars.dta"

append using "outdata1wVars.dta" "outdata2wVars.dta" "outdata3wVars.dta" "outdata4wVars.dta" "outdata5wVars.dta", generate(append) force
save as "MIdata.dta"
clear

* Rebel group-level analyses on imputed datasets (Appendix Table A1) - using Stata:

use "MIdata.dta"

mi import flong, m(imp) id(id) imputed(formed_year-contestation_local)

* Model 1
mi estimate: reg rural protest_local, cluster(ccode)

*Model 2
mi estimate: reg rural protest_local  clandestine foreign_govt oil_diamonds, cluster(ccode)

*Model 3
mi estimate: reg rural contestation_local, cluster(country)

*Model 4
mi estimate: reg rural contestation_local oil_diamonds foreign_govt clandestine, cluster(country)

*Model 5
mi estimate: reg rural no_prior_org, cluster(country)

*Model 6
mi estimate: reg rural no_prior_org clandestine oil_diamonds foreign_govt, cluster(country)

*Model 7
mi estimate: reg rural civilsociety, cluster(country)

*Model 8
mi estimate: reg rural civilsociety clandestine oil_diamonds foreign_govt, cluster(country)







